#--------------------------------------------------#
#   Academic freedom and the onset of autocratization   #
#--------------------------------------------------#

# Title: "Academic freedom and the onset of autocratization" #
# Authors: "Pelke, Lars", FAU Erlangen-Nürnberg
# date: 2023-04-24
# journal: Democratization
# DOI: 10.1080/13510347.2023.2207213
# written under "R version 4.2.3 (2023-03-15)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory
setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

#### Libraries ####

library(tidyverse)
library(performance)
library(estimatr)
library(ggeffects)
library(ggokabeito)
library(broom)
library(marginaleffects)
library(ggpubr)
library(Amelia)

#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds") 

merged_data_full <- merged_data %>%
  drop_na(v2x_polyarchy, v2xca_academ, e_pop, e_gdppc, v2xca_academ_10)

merged_data_full <- merged_data_full %>%
  drop_na(democratic_values)


#### Delete Cohorts with less than 1000 persons 

merged_data_full %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_full <- merged_data_full %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_full <- merged_data_full %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)

#### Generate country_cohort variable for calculating std. err. ####

merged_data_full <- merged_data_full %>%
  mutate(country_cohort = str_c(country_text_id, cohortmatch5_20, sep = "_"))

table(merged_data_full$country_cohort)

#################################
#Analysis
##################################

## Model 1 without macro-level vars##
m1 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch5_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_full)
summary(m1)

## Model 2 ##
m2 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_5 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch5_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_full)
summary(m2)


## Model 3 ##
m3 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_5 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_5 + as.factor(year) + as.factor(cohortmatch5_20), 
                fixed_effects = ~ country_id,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_full)
summary(m3)

options(modelsummary_factory_html = "kableExtra")

modelsummary::modelsummary(list(m1, m2, m3),
                           output = "results/Table1_5.tex", 
                           fmt = 3,
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_omit = "year|country_id|cohortmatch5_20", 
                           coef_rename = c("graduate" = "Graduate", 
                                           "sex" = "Sex", 
                                           "age" = "Age", 
                                           "income_deciles" = "Income (ten cat.)", 
                                           "children" = "Children", 
                                           "unemployed" = "Unemployed", 
                                           "v2xca_academ_5" = "Academic_Freedom at C", 
                                           "v2x_polyarchy" = "Electoral Democracy Score", 
                                           "log10(e_gdppc)" = "GDP pc(log)", 
                                           "log10(e_pop)" = "Population (log)", 
                                           "graduate:v2xca_academ_5" = "Academic Freedom at C * Graduate"))


modelsummary::modelsummary(list(m1, m2, m3),
                           output = "results/Table1_5.docx", 
                           fmt = 3,
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_omit = "year|country_id|cohortmatch5_20", 
                           coef_rename = c("graduate" = "Graduate", 
                                           "sex" = "Sex", 
                                           "age" = "Age", 
                                           "income_deciles" = "Income (ten cat.)", 
                                           "children" = "Children", 
                                           "unemployed" = "Unemployed", 
                                           "v2xca_academ_5" = "Academic_Freedom at C", 
                                           "v2x_polyarchy" = "Electoral Democracy Score", 
                                           "log10(e_gdppc)" = "GDP pc(log)", 
                                           "log10(e_pop)" = "Population (log)", 
                                           "graduate:v2xca_academ_5" = "Academic Freedom at C * Graduate"))


#### Figure 2: Coefficient Plot ####


b <- list(geom_vline(xintercept = 0, color = 'orange'),
          annotate("rect", alpha = .1,
                   xmin = -.5, xmax = .5, 
                   ymin = -Inf, ymax = Inf),
          geom_point(aes(y = term, x = estimate), alpha = .3, 
                     size = 10, color = 'red'))

modelsummary::modelplot(m3, coef_omit = 'Intercept|year|country_id|cohortmatch5_20', background = b)

##Simulations for data 

merged_data_full$cohortmatch5_20 <- as.factor(merged_data_full$cohortmatch5_20)
merged_data_full$year <- as.factor(merged_data_full$year)


## Model 3 ##
m3_sim <- lm(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
               unemployed + v2xca_academ_5 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
               graduate:v2xca_academ_5 + year + cohortmatch5_20 + country_text_id, 
             data = merged_data_full)
summary(m3_sim)


m3_pred <- ggpredict(m3_sim, terms = c("v2xca_academ_5[all]", "graduate[0,1]"),
                     type = "fe", condition = c(cohortmatch5_20 = 1965, country_text_id = "ARG"), 
                     cov.fun = "vcovCL", 
                     vcov.type = "HC1",
                     vcov.args = list(cluster = merged_data_full$country_cohort))

f1b <- ggplot(m3_pred, aes(x = x, y = predicted, group = group, color = group, fill = group)) +
  geom_line() +
  geom_ribbon(aes(ymin= conf.low, ymax = conf.high), alpha = 0.3) +
  theme_bw() +
  ggtitle("") +
  labs(color= "Graduate", fill = "Graduate",  
       y= "Predicted Democratic Support",
       x = "Academic Freedom at University Socialization") + 
  theme(legend.position="bottom") +
  scale_color_okabe_ito(order = c(2, 6)) +
  scale_fill_okabe_ito(order = c(2, 6)) 


#### Predict Marginal effects ####

margin_m3 <- marginaleffects(m3_sim,  variables = "v2xca_academ_5", vcov = ~country_cohort,
                             newdata = datagrid(graduate = c(0,1)))

margin_m3$graduate <- as.factor(margin_m3$graduate)
levels(margin_m3$graduate) <- c("Non-Graduate", "Graduate")

f1a <- ggplot(margin_m3, aes(x = graduate, y = dydx, group = graduate, color = graduate, fill = graduate)) +
  geom_point() +
  geom_linerange(aes(ymin = conf.low, ymax=  conf.high)) + 
  theme_bw() +
  ggtitle("") +
  labs(color= "Graduate", fill = "Graduate",  
       y= "Average Marinal Effect of Academic Freedom at C",
       x = "Graduate") + 
  theme(legend.position="bottom") +
  scale_color_okabe_ito(order = c(2, 6)) +
  scale_fill_okabe_ito(order = c(2, 6)) 


ggarrange(f1a, f1b)


ggsave("figures/Figure_2_5.pdf", width = 25,
       height = 15,
       units = c("cm"), dpi = 1000)

#################################################################################

m3_pred <- ggpredict(m3_sim, terms = c("v2xca_academ_5[all]", "graduate[0,1]", "cohortmatch5_20[all]"),
                     type = "fe", 
                     cov.fun = "vcovCL", 
                     vcov.type = "HC1",
                     vcov.args = list(cluster = merged_data_full$country_cohort))

f2 <- ggplot(m3_pred, aes(x = x, y = predicted, group = group, color = group, fill = group)) +
  geom_line() +
  geom_ribbon(aes(ymin= conf.low, ymax = conf.high), alpha = 0.3) +
  theme_bw() +
  ggtitle("") +
  facet_wrap(vars(facet), nrow = 4) +
  labs(color= "Graduate", fill = "Graduate",  
       y= "Predicted Democratic Support",
       x = "Academic Freedom at University Socialization") + 
  theme(legend.position="bottom") +
  scale_color_okabe_ito(order = c(2, 6)) +
  scale_fill_okabe_ito(order = c(2, 6)) 


ggsave(plot = f2, "figures/Figure_3_5.pdf", width = 20,
       height = 20,
       units = c("cm"), dpi = 1000)


############################################################################################
############################################################################################

#### Separate Indicator instead of Index ####

#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds") 

merged_data_full <- merged_data %>%
  drop_na(v2x_polyarchy, v2xca_academ, e_pop, e_gdppc, v2xca_academ_10)

#### Generate country_cohort variable for calculating std. err. ####

merged_data_full <- merged_data_full %>%
  mutate(country_cohort = str_c(country_text_id, cohortmatch5_20, sep = "_"))

merged_data_army_rule <- merged_data_full %>%
  drop_na(army_rule)

merged_data_strong_leader <- merged_data_full %>%
  drop_na(strong_leader)

merged_data_expert_gov <- merged_data_full %>%
  drop_na(expert_gov)

merged_data_demo_system <- merged_data_full %>%
  drop_na(demo_system)


#### Delete Cohorts with less than 1000 persons 

merged_data_army_rule %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_army_rule <- merged_data_army_rule %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_army_rule <- merged_data_army_rule %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)

merged_data_strong_leader %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_strong_leader <- merged_data_strong_leader %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_strong_leader <- merged_data_strong_leader %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)


merged_data_expert_gov %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_expert_gov <- merged_data_expert_gov %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_expert_gov <- merged_data_expert_gov %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)

merged_data_demo_system %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_demo_system <- merged_data_demo_system %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_demo_system <- merged_data_demo_system %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)


#################################
#Analysis
##################################

#### Army Rule ####
table(merged_data_army_rule$army_rule)

## Model 1 without macro-level vars##
m1 <- lm_robust(army_rule ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_army_rule)
summary(m1)

## Model 2 ##
m2 <- lm_robust(army_rule ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_army_rule)
summary(m2)


## Model 3 ##
m3 <- lm_robust(army_rule ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                fixed_effects = ~ country_id,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_army_rule)
summary(m3)

#### Strong leader ####
table(merged_data_strong_leader$strong_leader)

## Model 1 without macro-level vars##
m4 <- lm_robust(strong_leader ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_strong_leader)
summary(m4)

## Model 2 ##
m5 <- lm_robust(strong_leader ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_strong_leader)
summary(m5)


## Model 3 ##
m6 <- lm_robust(strong_leader ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                fixed_effects = ~ country_id,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_strong_leader)
summary(m6)


#### expert government ####
table(merged_data_expert_gov$expert_gov)

## Model 1 without macro-level vars##
m7 <- lm_robust(expert_gov ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_expert_gov)
summary(m7)

## Model 2 ##
m8 <- lm_robust(expert_gov ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_expert_gov)
summary(m8)


## Model 3 ##
m9 <- lm_robust(expert_gov ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                fixed_effects = ~ country_id,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_expert_gov)
summary(m9)

#### Democratic System  ####
table(merged_data_demo_system$demo_system)

## Model 1 without macro-level vars##
m10 <- lm_robust(demo_system ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_demo_system)
summary(m10)

## Model 2 ##
m11 <- lm_robust(demo_system ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_demo_system)
summary(m11)


## Model 3 ##
m12 <- lm_robust(demo_system ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                fixed_effects = ~ country_id,
                clusters = country_cohort, se_type = "stata", 
                data = merged_data_demo_system)
summary(m12)


options(modelsummary_factory_html = "kableExtra")

modelsummary::modelsummary(list(m2, m3, m5, m6, m8, m9, m11, m12),
                           output = "results/TableA2.tex", 
                           fmt = 3,
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_omit = "year|country_id|cohortmatch10_20", 
                           coef_rename = c("graduate" = "Graduate", 
                                           "sex" = "Sex", 
                                           "age" = "Age", 
                                           "income_deciles" = "Income (ten cat.)", 
                                           "children" = "Children", 
                                           "unemployed" = "Unemployed", 
                                           "v2xca_academ_10" = "Academic_Freedom at C", 
                                           "v2x_polyarchy" = "Electoral Democracy Score", 
                                           "log10(e_gdppc)" = "GDP pc(log)", 
                                           "log10(e_pop)" = "Population (log)", 
                                           "graduate:v2xca_academ_10" = "Academic Freedom at C * Graduate"))

#################################################################################
#################################################################################

#### AMELIA IMPUTATION ####

merged_dataamelia <- merged_data %>%
  mutate(country_cohort = str_c(country_text_id, cohortmatch5_20, sep = "_")) %>%
  dplyr::select(country_cohort, cohortmatch10_20, year, country_text_id, graduate, sex, age, income_deciles, children, 
                unemployed,v2xca_academ_10, v2x_polyarchy, e_gdppc, e_pop, democratic_values_normalized)

merged_dataamelia$year <- as.numeric(merged_dataamelia$year)
class(merged_dataamelia)
merged_dataamelia <- as.data.frame(merged_dataamelia)


a.merged_dataamelia <- amelia(merged_dataamelia, 
                              ts= "year", cs = "country_cohort", 
                    idvars=c("cohortmatch10_20", "country_text_id",
                                         "v2xca_academ_10", "v2x_polyarchy", "e_gdppc", "e_pop"), m=10, 
                    ords=c("graduate", "sex", "age", "income_deciles", "children", 
                           "unemployed"), 
                    p2s=0)
summary(a.merged_dataamelia)
summary(a.merged_dataamelia$imputations[[1]])


#### Analysis ####


## Build one data frame for regression analysis ##

all_imputations <- bind_rows(unclass(a.merged_dataamelia$imputations), .id = "m") %>%
  group_by(m) %>%
  nest()

all_imputations

## Model 1

v1 <- all_imputations %>%
  mutate(model = data %>% map(~lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                                           unemployed, 
                                         fixed_effects = ~ year + country_text_id + cohortmatch10_20,
                                         clusters = country_cohort, se_type = "stata", 
                                         data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v1

#Inspect the data for imputation dataset 1
v1 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v1_par <- v1 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v1_par <- v1_par %>%
  select(-obs)

v1_par

# Extract the coefficients
v1_coefs <- v1_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v1_coefs

# Extract the standard errors
v1_ses <- v1_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


# Mi meld function Amelia
v1_melded <- mi.meld(v1_coefs, v1_ses)
v1_melded 

# Regression coefficient table
v1_model_dg <- v1 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v1_table <- as.data.frame(cbind(t(v1_melded$q.mi),
                                t(v1_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v1_model_dg),
         conf.high = estimate + std.error * qt(0.975, v1_model_dg),
         p.value = 2 * pt(abs(statistic), v1_model_dg, lower.tail = FALSE))

v1_table

v1_data <- v1_table %>%
  drop_na()


## Model 2

v2 <- all_imputations %>%
  mutate(model = data %>% map(~lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                                           unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                                         fixed_effects = ~ year + country_text_id + cohortmatch10_20,
                                         clusters = country_cohort, se_type = "stata", 
                                         data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v2

#Inspect the data for imputation dataset 1
v2 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v2_par <- v2 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v2_par <- v2_par %>%
  select(-obs)

v2_par

# Extract the coefficients
v2_coefs <- v2_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v2_coefs

# Extract the standard errors
v2_ses <- v2_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


# Mi meld function Amelia
v2_melded <- mi.meld(v2_coefs, v2_ses)
v2_melded 

# Regression coefficient table
v2_model_dg <- v2 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v2_table <- as.data.frame(cbind(t(v2_melded$q.mi),
                                t(v2_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v2_model_dg),
         conf.high = estimate + std.error * qt(0.975, v2_model_dg),
         p.value = 2 * pt(abs(statistic), v2_model_dg, lower.tail = FALSE))

v2_table

v2_data <- v2_table %>%
  drop_na()


## Model 3

v3 <- all_imputations %>%
  mutate(model = data %>% map(~lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                                           unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                                           graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                                         fixed_effects = ~ country_text_id,
                                         clusters = country_cohort, se_type = "stata", 
                                         data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
v3

#Inspect the data for imputation dataset 1
v3 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v3_par <- v3 %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v3_par <- v3_par %>%
  select(-obs)

v3_par

# Extract the coefficients
v3_coefs <- v3_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  select(-m, -key)
v3_coefs

# Extract the standard errors
v3_ses <- v3_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  select(-m, -key)


# Mi meld function Amelia
v3_melded <- mi.meld(v3_coefs, v3_ses)
v3_melded 

# Regression coefficient table
v3_model_dg <- v3 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v3_table <- as.data.frame(cbind(t(v3_melded$q.mi),
                                t(v3_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v3_model_dg),
         conf.high = estimate + std.error * qt(0.975, v3_model_dg),
         p.value = 2 * pt(abs(statistic), v3_model_dg, lower.tail = FALSE))

v3_table

v3_data <- v3_table %>%
  drop_na()


## Extract Texreg Table ##

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  # get goodness-of-fit statistics from glance
  glance_transposed <- as_tibble(cbind(name = names(glance_model), t(glance_model)))
  gof.names <- as.character(glance_transposed$name)
  gof <- as.double(glance_transposed$value)
  gof.decimal <- gof %% 1 > 0
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues,
                                    gof.names = gof.names,
                                    gof = gof,
                                    gof.decimal = gof.decimal)
  return(tr_object)
}

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues)
  return(tr_object)
}

v1_tex <- extract_broom(v1_data)
v2_tex <- extract_broom(v2_data)
v3_tex <- extract_broom(v3_data)

library(texreg)
texreg(list(v1_tex, v2_tex, v3_tex), 
       head.tag = TRUE, body.tag = TRUE,
       digits = 3,
       omit.coef=c('country_text_id|year|cohortmatch10_20'), 
       custom.coef.names = c("Age", 
                             "Children", 
                             "Graduate", 
                             "Income (ten. cat.)", 
                             "Sex", 
                             "Unemployed", 
                             "GDP pc(log)", 
                             "Population (log)", 
                             "Electoral democracy Score", 
                             "Academic Freedom at C", 
                             "Academic Freedom at C * Graduate"),
       
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Estimated effect of academic freedom during early adulthood for graduates and non-graduates",
       fontsize = "scriptsize",
       label = "A3")




####Manually fill additional information regression ####

## Predicting AIC; BIC; and Log Likelihood##

## Function written by Andrew Heiss 
##(https://www.andrewheiss.com/blog/2018/03/08/amelia-broom-huxtable/)

tidy.melded <- function(x, conf.int = FALSE, conf.level = 0.95) {
  # Get the df from one of the models
  model_degrees_freedom <- glance(x[[1]])$df.residual
  
  # Create matrices of the estimates and standard errors
  params <- data_frame(models = x) %>%
    mutate(m = 1:n(),
           tidied = models %>% map(tidy)) %>% 
    unnest(tidied) %>%
    select(m, term, estimate, std.error) %>%
    gather(key, value, estimate, std.error) %>%
    mutate(term = fct_inorder(term)) %>%  # Order the terms so that spread() keeps them in order
    spread(term, value)
  
  just_coefs <- params %>% filter(key == "estimate") %>% select(-m, -key)
  just_ses <- params %>% filter(key == "std.error") %>% select(-m, -key)
  
  # Meld the coefficients with Rubin's rules
  coefs_melded <- mi.meld(just_coefs, just_ses)
  
  # Create tidy output
  output <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                t(coefs_melded$se.mi))) %>%
    magrittr::set_colnames(c("estimate", "std.error")) %>%
    mutate(term = rownames(.)) %>%
    select(term, everything()) %>%
    mutate(statistic = estimate / std.error,
           p.value = 2 * pt(abs(statistic), model_degrees_freedom, lower.tail = FALSE))
  
  # Add confidence intervals if needed
  if (conf.int & conf.level) {
    # Convert conf.level to tail values (0.025 when it's 0.95)
    a <- (1 - conf.level) / 2
    
    output <- output %>% 
      mutate(conf.low = estimate + std.error * qt(a, model_degrees_freedom),
             conf.high = estimate + std.error * qt((1 - a), model_degrees_freedom))
  }
  
  # tidy objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

glance.melded <- function(x) {
  # Because the properly melded parameters and the simple average of the
  # parameters of these models are roughly the same (see
  # https://www.andrewheiss.com/blog/2018/03/07/amelia-tidy-melding/), for the
  # sake of simplicty we just take the average here
  output <- data_frame(models = x) %>%
    mutate(glance = models %>% map(glance)) %>%
    unnest(glance) %>%
    summarize_at(vars(logLik, AIC, BIC),
                 funs(mean)) %>%
    mutate(m = as.integer(length(x)))
  
  # glance objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

nobs.melded <- function(x, ...) {
  # Take the number of observations from the first model
  nobs(x[[1]])
}

####

# Extract the models into a vector and make it a "melded" class
v1_imputed <- v1$model
v2_imputed <- v2$model
v3_imputed <- v3$model


# Without this, R won't use our custom tidy.melded() or glance.melded() functions
class(v1_imputed) <- "melded"
class(v2_imputed) <- "melded"
class(v3_imputed) <- "melded"

################################################################################
################################################################################

#### Clustered Standard Errors at country-year instead of country-cohort ####


#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds") 

merged_data_full <- merged_data %>%
  drop_na(v2x_polyarchy, v2xca_academ, e_pop, e_gdppc, v2xca_academ_10)

merged_data_full <- merged_data_full %>%
  drop_na(democratic_values)


#### Delete Cohorts with less than 1000 persons 

merged_data_full %>%
  group_by(cohort_10.x) %>%
  count()

merged_data_full <- merged_data_full %>%
  filter(cohort_10.x >= 1915) # delete all respondents which birth cohort is before 1910

merged_data_full <- merged_data_full %>%
  dplyr::select(-c(country_name.y, cohort_10.y)) %>%
  rename(country_name = country_name.x, 
         cohort_10 = cohort_10.x)

#### Generate country_cohort variable for calculating std. err. ####

merged_data_full <- merged_data_full %>%
  mutate(country_cohort = str_c(country_text_id, cohortmatch10_20, sep = "_"), 
         country_year =  str_c(country_text_id, year, sep = "_"))

table(merged_data_full$country_cohort)
table(merged_data_full$country_year)


#################################
#Analysis
##################################

## Model 1 without macro-level vars##
m1 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed, 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_year, se_type = "stata", 
                data = merged_data_full)
summary(m1)

## Model 2 ##
m2 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) , 
                fixed_effects = ~ year + country_id + cohortmatch10_20,
                clusters = country_year, se_type = "stata", 
                data = merged_data_full)
summary(m2)


## Model 3 ##
m3 <- lm_robust(democratic_values_normalized ~  graduate + sex + age + income_deciles + children +
                  unemployed + v2xca_academ_10 + v2x_polyarchy + log10(e_gdppc) + log10(e_pop) + 
                  graduate:v2xca_academ_10 + as.factor(year) + as.factor(cohortmatch10_20), 
                fixed_effects = ~ country_id,
                clusters = country_year, se_type = "stata", 
                data = merged_data_full)
summary(m3)

options(modelsummary_factory_html = "kableExtra")

modelsummary::modelsummary(list(m1, m2, m3),
                           output = "results/Table_A4.tex", 
                           fmt = 3,
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_omit = "year|country_id|cohortmatch10_20", 
                           coef_rename = c("graduate" = "Graduate", 
                                           "sex" = "Sex", 
                                           "age" = "Age", 
                                           "income_deciles" = "Income (ten cat.)", 
                                           "children" = "Children", 
                                           "unemployed" = "Unemployed", 
                                           "v2xca_academ_10" = "Academic_Freedom at C", 
                                           "v2x_polyarchy" = "Electoral Democracy Score", 
                                           "log10(e_gdppc)" = "GDP pc(log)", 
                                           "log10(e_pop)" = "Population (log)", 
                                           "graduate:v2xca_academ_10" = "Academic Freedom at C * Graduate"))




